;------------------------------------------------------------------------------
;
; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Abstract:
;
;   Provide macro for register save/restore using SSE registers
;
;------------------------------------------------------------------------------

;
; Define SSE and AVX instruction set
;
;
; Define SSE macros using SSE 4.1 instructions
; args 1:XMM, 2:IDX, 3:REG
;
%macro SXMMN        3
            pinsrq  %1, %3, (%2 & 3)
            %endmacro

;
; args 1:XMM, 2:REG, 3:IDX
;
%macro LXMMN        3
            pextrq  %2, %1, (%3 & 3)
            %endmacro

;
; Define AVX macros using AVX instructions
; Save XMM to YMM
; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
;
%macro SYMMN        3
            vinsertf128  %1, %1, %3, %2
            %endmacro

;
; Restore XMM from YMM
; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
;
%macro LYMMN        3
            vextractf128  %2, %1, %3
            %endmacro

;
; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
; Modified: XMM5, YMM6, YMM7 and YMM8
;
%macro SAVE_REGS    0
            SXMMN   xmm5, 0, rbp
            SXMMN   xmm5, 1, rbx
            SYMMN   ymm7, 1, xmm5
            SXMMN   xmm5, 0, rsi
            SXMMN   xmm5, 1, rdi
            SYMMN   ymm8, 1, xmm5
            SAVE_RSP
            %endmacro

;
; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
;
%macro LOAD_REGS    0
            LYMMN   ymm7, xmm5, 1
            LXMMN   xmm5, rbp, 0
            LXMMN   xmm5, rbx, 1
            LYMMN   ymm8, xmm5, 1
            LXMMN   xmm5, rsi, 0
            LXMMN   xmm5, rdi, 1
            LOAD_RSP
            %endmacro
;
; Restore RBP from YMM7[128:191]
; Modified: XMM5 and RBP
;
%macro LOAD_RBP     0
            LYMMN   ymm7, xmm5, 1
            movq    rbp,  xmm5
            %endmacro

;
; Restore RBX from YMM7[192:255]
; Modified: XMM5 and RBX
;
%macro LOAD_RBX     0
            LYMMN   ymm7, xmm5, 1
            LXMMN   xmm5, rbx, 1
            %endmacro

;
; Upper half of YMM6 to save/restore Time Stamp, RSP
;
;
; Save Time Stamp to YMM6[192:255]
; arg 1:general purpose register which holds time stamp
; Modified: XMM5 and YMM6
;
%macro SAVE_TS      1
            LYMMN   ymm6, xmm5, 1
            SXMMN   xmm5, 1, %1
            SYMMN   ymm6, 1, xmm5
            %endmacro

;
; Restore Time Stamp from YMM6[192:255]
; arg 1:general purpose register where to save time stamp
; Modified: XMM5 and %1
;
%macro LOAD_TS      1
            LYMMN   ymm6, xmm5, 1
            LXMMN   xmm5, %1, 1
            %endmacro

;
; Save RSP to YMM6[128:191]
; Modified: XMM5 and YMM6
;
%macro SAVE_RSP     0
            LYMMN   ymm6, xmm5, 1
            SXMMN   xmm5, 0, rsp
            SYMMN   ymm6, 1, xmm5
            %endmacro

;
; Restore RSP from YMM6[128:191]
; Modified: XMM5 and RSP
;
%macro LOAD_RSP     0
            LYMMN   ymm6, xmm5, 1
            movq    rsp,  xmm5
            %endmacro

;
; Upper half of YMM9 to save/restore UCODE status, BFV address
;
;
; Save uCode status to YMM9[192:255]
; arg 1:general purpose register which holds uCode status
; Modified: XMM5 and YMM9
;
%macro SAVE_UCODE_STATUS     1
            LYMMN   ymm9, xmm5, 1
            SXMMN   xmm5, 0, %1
            SYMMN   ymm9, 1, xmm5
            %endmacro

;
; Restore uCode status from YMM9[192:255]
; arg 1:general purpose register where to save uCode status
; Modified: XMM5 and %1
;
%macro LOAD_UCODE_STATUS     1
            LYMMN   ymm9, xmm5, 1
            movq    %1,  xmm5
            %endmacro

;
; Save BFV address to YMM9[128:191]
; arg 1:general purpose register which holds BFV address
; Modified: XMM5 and YMM9
;
%macro SAVE_BFV     1
            LYMMN   ymm9, xmm5, 1
            SXMMN   xmm5, 1, %1
            SYMMN   ymm9, 1, xmm5
            %endmacro

;
; Restore BFV address from YMM9[128:191]
; arg 1:general purpose register where to save BFV address
; Modified: XMM5 and %1
;
%macro LOAD_BFV     1
            LYMMN   ymm9, xmm5, 1
            LXMMN   xmm5, %1, 1
            %endmacro

;
; Upper half of YMM10 to save/restore RCX
;
;
; Save RCX to YMM10[128:191]
; Modified: XMM5 and YMM10
;

%macro SAVE_RCX     0
            LYMMN   ymm10, xmm5, 1
            SXMMN   xmm5, 0, rcx
            SYMMN   ymm10, 1, xmm5
            %endmacro

;
; Restore RCX from YMM10[128:191]
; Modified: XMM5 and RCX
;

%macro LOAD_RCX     0
            LYMMN   ymm10, xmm5, 1
            movq    rcx,  xmm5
            %endmacro

;
; Save TemporaryRamSize to YMM10[192:255]
; arg 1:general purpose register which holds TemporaryRamSize
; Modified: XMM5 and YMM10[192:255]
;
%macro SAVE_TEMPORARY_RAM_SIZE     1
            LYMMN   ymm10, xmm5, 1
            SXMMN   xmm5, 1, %1
            SYMMN   ymm10, 1, xmm5
            %endmacro

;
; Restore TemporaryRamSize from YMM10[192:255]
; arg 1:general purpose register where to save TemporaryRamSize
; Modified: XMM5 and %1
;
%macro LOAD_TEMPORARY_RAM_SIZE     1
            LYMMN   ymm10, xmm5, 1
            LXMMN   xmm5, %1, 1
            %endmacro

;
; YMM7[128:191] for calling stack
; arg 1:Entry
; Modified: RSI, XMM5, YMM7
;
%macro CALL_YMM     1
            mov     rsi, %%ReturnAddress
            LYMMN   ymm7, xmm5, 1
            SXMMN   xmm5, 0, rsi
            SYMMN   ymm7, 1, xmm5
            mov     rsi,  %1
            jmp     rsi
%%ReturnAddress:
            %endmacro
;
; Restore RIP from YMM7[128:191]
; Modified: RSI, XMM5
;
%macro RET_YMM      0
            LYMMN   ymm7, xmm5, 1
            movq    rsi, xmm5
            jmp     rsi
            %endmacro

%macro ENABLE_SSE   0
            ;
            ; Initialize floating point units
            ;
            jmp     NextAddress
align 4
            ;
            ; Float control word initial value:
            ; all exceptions masked, double-precision, round-to-nearest
            ;
FpuControlWord       DW      027Fh
            ;
            ; Multimedia-extensions control word:
            ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
            ;
MmxControlWord       DQ      01F80h
SseError:
            ;
            ; Processor has to support SSE
            ;
            jmp     SseError
NextAddress:
            finit
            mov rax, FpuControlWord
            fldcw [rax]

            ;
            ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
            ; whether the processor supports SSE instruction.
            ;
            ; Save RBX to R11
            ; Save RCX to R10
            ;
            mov     r11, rbx
            mov     r10, rcx
            mov     rax, 1
            cpuid
            bt      rdx, 25
            jnc     SseError

            ;
            ; SSE 4.1 support
            ;
            bt      ecx, 19
            jnc     SseError
            ;
            ; Restore RBX from R11
            ; Restore RCX from R10
            ;
            mov     rbx, r11
            mov     rcx, r10

            ;
            ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
            ;
            mov     rax, cr4
            or      rax, 00000600h
            mov     cr4, rax

            ;
            ; The processor should support SSE instruction and we can use
            ; ldmxcsr instruction
            ;
            mov  rax, MmxControlWord
            ldmxcsr [rax]
            %endmacro

%macro ENABLE_AVX   0
            ;
            ; Save RBX to R11
            ; Save RCX to R10
            ;
            mov     r11, rbx
            mov     r10, rcx
            mov     eax, 1
            cpuid
            and     ecx, 10000000h
            cmp     ecx, 10000000h ; check AVX feature flag
            je      EnableAvx
AvxError:
            ;
            ; Processor has to support AVX
            ;
            jmp     AvxError
EnableAvx:
            ;
            ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
            ;
            mov     rax, cr4
            or      rax, 00040000h
            mov     cr4, rax

            mov     rcx, 0         ; index 0
            xgetbv                 ; result in edx:eax
            or      eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
            xsetbv
            ;
            ; Restore RBX from R11
            ; Restore RCX from R10
            ;
            mov     rbx, r11
            mov     rcx, r10
            %endmacro

